{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Principal Component Analysis\n",
    "PCA implementation using eigen and SVD\n",
    "\n",
    "ref [1](http://agnesmustar.com/2017/11/01/principal-component-analysis-pca-implemented-pytorch/)\n",
    "\n",
    "ref [2](https://zhuanlan.zhihu.com/p/58064462)\n",
    "\n",
    "ref [3](https://blog.csdn.net/Dark_Scope/article/details/53150883)\n",
    "\n",
    "ref [4](https://blog.csdn.net/Little_Fire/article/details/80445987) Eigende Composition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.manual_seed(886)\n",
    "m=5 # num of sample\n",
    "n=8 # dim of one sample\n",
    "ratio=0.99"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.1389,  0.3233,  0.1166, -0.4258,  0.2467,  0.0271, -0.1988,  0.4109],\n",
       "        [ 0.2700, -0.4517, -0.4135,  0.4607,  0.0278, -0.2408, -0.2052, -0.0065],\n",
       "        [ 0.1202,  0.3165, -0.4434,  0.0486, -0.2088,  0.3180,  0.0377,  0.0705],\n",
       "        [-0.2457, -0.2703, -0.0092,  0.3814,  0.2892, -0.0494,  0.3654, -0.3693],\n",
       "        [ 0.3405, -0.2373, -0.1980,  0.3845, -0.2701, -0.1027, -0.2083,  0.1383]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "src = torch.rand((m,n))\n",
    "# 1. centerize (n,m)\n",
    "src = src - src.mean()\n",
    "src"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Eigen\n",
    "$$ X^TX = VEV^T$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. conv (m,m)\n",
    "conv = torch.mm(src.t(), src)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. eigen\n",
    "# eigen value is sorted, ascending\n",
    "# eigen vector is column by column \n",
    "e, v = conv.symeig(eigenvectors=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# check eig\n",
    "rec_conv = torch.matmul(torch.matmul(v, torch.diag(e)), torch.t(v))\n",
    "assert conv.allclose(rec_conv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "perfect k is 4\n"
     ]
    }
   ],
   "source": [
    "# find the perfect k\n",
    "total = e.sum()\n",
    "for k in range(1, n+1):\n",
    "    if e[-k:].sum()/total >= ratio:\n",
    "        print('perfect k is', k)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.2578,  0.2384,  0.1905,  0.6529],\n",
       "        [-0.2050,  0.1887,  0.2000, -0.7995],\n",
       "        [-0.1288, -0.5249,  0.4153,  0.0067],\n",
       "        [-0.1565, -0.1421, -0.6577, -0.3916],\n",
       "        [ 0.1006,  0.1023,  0.4043, -0.5562]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 4. compute pca (m,k)\n",
    "pca_eig = torch.mm(src, v[:,-k:])\n",
    "pca_eig"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SVD\n",
    "$$X=U \\Sigma V^T$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[-0.6529, -0.1905,  0.2384, -0.2578],\n",
       "        [ 0.7995, -0.2000,  0.1887, -0.2050],\n",
       "        [-0.0067, -0.4153, -0.5249, -0.1288],\n",
       "        [ 0.3916,  0.6577, -0.1421, -0.1565],\n",
       "        [ 0.5562, -0.4043,  0.1023,  0.1006]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "u, s, v = src.svd()\n",
    "# s is sorted, but is descending\n",
    "pca_svd = torch.mm(src, v[:,:k])\n",
    "pca_svd"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
